library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
testBattingFielding <- read.csv("D:\\Vishal\\III year\\Data Analytics\\Assignment II\\Player Statistics\\testcareerbattingandfielding.csv")
df <- testBattingFielding[, 1:12]
head(df)
## Name Matches Innings Not_Outs Runs High_Score
## 1 Aakash Shyamlal Chopra 10 19 0 437 60
## 2 Abhimanyu Mithun 4 5 0 120 46
## 3 Abhinav Mukund 5 10 0 211 62
## 4 Abraham Benjamin de Villiers 106 176 16 8074 278
## 5 Adam Charles Voges 15 21 7 1337 269
## 6 Adam Craig Gilchrist 96 137 20 5570 204
## Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken Stumpings
## 1 23.00 0 2 34.60 15 NA
## 2 24.00 0 0 48.19 0 NA
## 3 21.10 0 1 40.73 5 NA
## 4 50.46 21 39 53.75 197 5
## 5 95.50 5 4 59.36 11 NA
## 6 47.60 17 26 81.98 379 37
df <- na.omit(df)
head(df)
## Name Matches Innings Not_Outs Runs High_Score
## 4 Abraham Benjamin de Villiers 106 176 16 8074 278
## 6 Adam Craig Gilchrist 96 137 20 5570 204
## 24 Bradley James Haddin 66 112 13 3265 169
## 26 Brendon Barrie McCullum 101 176 9 6453 302
## 83 Kamran Akmal 53 92 6 2648 158
## 90 Krishankumar Dinesh Karthik 23 37 1 1000 129
## Average No_Of_100 No_Of_50 Strike_Rate Catches_Taken Stumpings
## 4 50.46 21 39 53.75 197 5
## 6 47.60 17 26 81.98 379 37
## 24 32.97 4 18 58.44 262 8
## 26 38.64 12 31 64.60 198 11
## 83 30.79 6 12 63.10 184 22
## 90 27.77 1 7 50.00 51 5
summary(df)
## Name Matches Innings
## Abraham Benjamin de Villiers: 1 Min. : 1.00 Min. : 2.00
## Adam Craig Gilchrist : 1 1st Qu.: 11.00 1st Qu.: 19.00
## Bradley James Haddin : 1 Median : 28.00 Median : 54.00
## Brendon Barrie McCullum : 1 Mean : 53.06 Mean : 86.06
## Kamran Akmal : 1 3rd Qu.: 96.00 3rd Qu.:144.00
## Krishankumar Dinesh Karthik : 1 Max. :146.00 Max. :233.00
## (Other) :11
## Not_Outs Runs High_Score Average
## Min. : 0.000 Min. : 56 Min. : 35.0 Min. :21.58
## 1st Qu.: 2.000 1st Qu.: 407 1st Qu.: 92.0 1st Qu.:30.31
## Median : 6.000 Median : 1546 Median :129.0 Median :33.80
## Mean : 8.294 Mean : 3172 Mean :154.2 Mean :36.08
## 3rd Qu.:16.000 3rd Qu.: 5498 3rd Qu.:204.0 3rd Qu.:38.64
## Max. :24.000 Max. :12400 Max. :319.0 Max. :57.40
##
## No_Of_100 No_Of_50 Strike_Rate Catches_Taken
## Min. : 0.000 Min. : 0.00 Min. :39.84 Min. : 4.0
## 1st Qu.: 0.000 1st Qu.: 2.00 1st Qu.:47.86 1st Qu.: 31.0
## Median : 2.000 Median :12.00 Median :53.75 Median : 57.0
## Mean : 6.706 Mean :16.47 Mean :55.41 Mean :143.8
## 3rd Qu.: 6.000 3rd Qu.:31.00 3rd Qu.:63.10 3rd Qu.:198.0
## Max. :38.000 Max. :52.00 Max. :81.98 Max. :530.0
##
## Stumpings
## Min. : 1.00
## 1st Qu.: 4.00
## Median : 5.00
## Mean :11.65
## 3rd Qu.:20.00
## Max. :38.00
##
set.seed(20)
testStumping <- testBattingFielding %>%
filter(Stumpings != 'NA') %>%
select(2, 5, 12)
testStumpingCluster <- kmeans(testStumping, 3)
testStumpingCluster$cluster <- as.factor(testStumpingCluster$cluster)
plot_ly(testStumping, x = ~Matches, y = ~Stumpings, type = 'scatter',
mode = 'markers', color = testStumpingCluster$cluster,
text = ~paste('Name: ', df$Name)) %>%
layout(title = "Cluster of wickets & economy")